This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
library(data.table)
library(tidyr)
library(maps)
library(haven)
library(ggplot2)
library(dplyr)
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
library(readxl)
hardship_complete <- read_excel("/Users/cristinacandido/Documents/Github/risk_wvs/data/Hardship_complete.xlsx")
hardship_complete
NA
NA
hardship_complete$mean_homicide=log(hardship_complete$mean_homicide)
hardship_complete$gdp=log(hardship_complete$gdp)
hardship_complete$Infant_mortality=log(hardship_complete$Infant_mortality)
hardship_complete$life_expect=log(hardship_complete$life_expect)
hardship_complete$gini_income=log(hardship_complete$gini_income)
hardship_complete$`primary_ female_ enrollment_ rate`=log(hardship_complete$`primary_ female_ enrollment_ rate`)
hardship_complete
# Reverse Codierung
hardship_complete$mean_homicide=scale(hardship_complete$mean_homicide)
hardship_complete$gdp=scale(-hardship_complete$gdp)
hardship_complete$Infant_mortality=scale(hardship_complete$Infant_mortality)
hardship_complete$life_expect=scale(-hardship_complete$life_expect)
hardship_complete$gini_income=scale(hardship_complete$gini_income)
hardship_complete$gini_income=scale(-hardship_complete$`primary_ female_ enrollment_ rate`)
hardship_complete
hardship_complete$hardship_index=(hardship_complete$mean_homicide+hardship_complete$gdp+hardship_complete$gini_income+hardship_complete$life_expect+hardship_complete$Infant_mortality+hardship_complete$`primary_ female_ enrollment_ rate`)/6
hardship_complete
NA
NA
# Data of Wave 5
WV5_data <- readRDS("/Users/cristinacandido/Documents/Github/risk_wvs/data/WVS/F00007944-WV5_Data_R_v20180912.rds")
# Convert WV5_data-object in data.frame
WV5_data_df <- as.data.frame(WV5_data)
# show first five columns
WV5_data_df
#rename the variables
WV5_data <- WV5_data_df %>%
rename(gender = V235, age = V237, country_code = V2, wave = V1, risktaking = V86, children = V56, married = V55, employed = V241, education = V238)
WV5_data
colnames(WV5_data)
[1] "wave" "V1A" "V1B" "country_code" "V2A" "V3" "V4" "V4_CO" "V5"
[10] "V5_CO" "V6" "V6_CO" "V7" "V7_CO" "V8" "V8_CO" "V9" "V9_CO"
[19] "V10" "V11" "V12" "V13" "V14" "V15" "V16" "V17" "V18"
[28] "V19" "V20" "V21" "V22" "V23" "V24" "V25" "V26" "V27"
[37] "V28" "V29" "V30" "V31" "V32" "V33" "V34" "V35" "V36"
[46] "V37" "V38" "V39" "V40" "V41" "V42" "V43" "V43_01" "V43_02"
[55] "V43_03" "V43_04" "V43_05" "V43_06" "V43_07" "V43_08" "V43_09" "V43_10" "V43_11"
[64] "V43_12" "V43_13" "V43_14" "V43_15" "V43_16" "V43_17" "V43_18" "V43_19" "V43_20"
[73] "V43_21" "V43_22" "V43_23" "V43_24" "V43_25" "V43_26" "V43_27" "V43_28" "V43_29"
[82] "V43_30" "V44" "V45" "V46" "V47" "V48" "V49" "V50" "V51"
[91] "V52" "V53" "V54" "married" "children" "V57" "V58" "V59" "V60"
[100] "V61" "V62" "V63" "V64" "V65" "V66" "V67" "V68" "V69"
[109] "V69_HK" "V70" "V70_HK" "V71" "V72" "V73" "V73_HK" "V74" "V74_HK"
[118] "V75" "V76" "V77" "V78" "V79" "V80" "V81" "V82" "V83"
[127] "V84" "V85" "risktaking" "V87" "V88" "V89" "V90" "V91" "V92"
[136] "V93" "V94" "V95" "V96" "V97" "V98" "V99" "V100" "V101"
[145] "V102" "V103" "V104" "V105" "V106" "V107" "V108" "V109" "V110"
[154] "V111" "V112" "V113" "V114" "V115" "V116" "V117" "V118" "V119"
[163] "V120" "V121" "V122" "V123" "V124" "V125" "V126" "V127" "V128"
[172] "V129" "V130" "V130_CA_1" "V130_IQ_1" "V130_IQ_2" "V130_IQ_3" "V130_IQ_4" "V130_NZ_1" "V130_NZ_2"
[181] "V131" "V132" "V133" "V134" "V135" "V136" "V137" "V138" "V139"
[190] "V140" "V141" "V142" "V143" "V144" "V145" "V146_00" "V146_01" "V146_02"
[199] "V146_03" "V146_04" "V146_05" "V146_06" "V146_07" "V146_08" "V146_09" "V146_10" "V146_11"
[208] "V146_12" "V146_13" "V146_14" "V146_15" "V146_16" "V146_17" "V146_18" "V146_19" "V146_20"
[217] "V146_21" "V146_22" "V147" "V148" "V149" "V150" "V151" "V151_IQ_A" "V151_IQ_B"
[226] "V152" "V153" "V154" "V155" "V156" "V157" "V158" "V159" "V160"
[235] "V161" "V162" "V163" "V164" "V165" "V166" "V167" "V168" "V169"
[244] "V170" "V171" "V172" "V173" "V174" "V175" "V176" "V177" "V178"
[253] "V179" "V180" "V181" "V182" "V183" "V184" "V185" "V186" "V187"
[262] "V188" "V189" "V190" "V191" "V192" "V193" "V194" "V195" "V196"
[271] "V197" "V198" "V199" "V200" "V201" "V202" "V203" "V204" "V205"
[280] "V206" "V207" "V208" "V209" "V210" "V211" "V212" "V213A" "V213B"
[289] "V213C" "V213D" "V213E" "V213F" "V213G" "V213H" "V213K" "V213L" "V213M"
[298] "V213N" "V214" "V215" "V216" "V217" "V218" "V219" "V220" "V221"
[307] "V222" "V223" "V224" "V225" "V226" "V227" "V228" "V229" "V230"
[316] "V231" "V232" "V233" "V233A" "V234" "gender" "V236" "age" "education"
[325] "V238CS" "V239" "V240" "employed" "V242" "V242A_CO" "V243" "V244" "V245"
[334] "V246" "V247" "V248" "V249" "V250" "V251" "V252" "V252B" "V253"
[343] "V253CS" "V254" "V255" "V255CS" "V256" "V257" "V257B" "V257C" "V258"
[352] "V259" "V259A" "V260" "V261" "V262" "V263" "V264" "V265" "S024"
[361] "S025" "Y001" "Y002" "Y003" "SACSECVAL" "SECVALWGT" "RESEMAVAL" "WEIGHTB" "I_AUTHORITY"
[370] "I_NATIONALISM" "I_DEVOUT" "DEFIANCE" "WEIGHT1A" "I_RELIGIMP" "I_RELIGBEL" "I_RELIGPRAC" "DISBELIEF" "WEIGHT2A"
[379] "I_NORM1" "I_NORM2" "I_NORM3" "RELATIVISM" "WEIGHT3A" "I_TRUSTARMY" "I_TRUSTPOLICE" "I_TRUSTCOURTS" "SCEPTICISM"
[388] "WEIGHT4A" "I_INDEP" "I_IMAGIN" "I_NONOBED" "AUTONOMY" "WEIGHT1B" "I_WOMJOB" "I_WOMPOL" "I_WOMEDU"
[397] "EQUALITY" "WEIGHT2B" "I_HOMOLIB" "I_ABORTLIB" "I_DIVORLIB" "CHOICE" "WEIGHT3B" "I_VOICE1" "I_VOICE2"
[406] "I_VOI2_00" "VOICE" "WEIGHT4B" "S001" "S007" "S018" "S019" "S021" "COW"
#select only the variables of interest
WV5_data <- WV5_data %>%
dplyr::select(gender, age, country_code, wave, risktaking, children, married, employed, education)
WV5_data
# Read countrynames data from the CSV file (to decode the dataset 5)
countrynames <- read.csv("/Users/cristinacandido/Documents/Github/risk_wvs/data/WVS/countrynames.txt", header = FALSE, as.is = TRUE)
colnames(countrynames) <- c("code", "name")
# Assuming WV5_data has a column named country_code
WV5_data$country <- countrynames$name[match(WV5_data$country_code, countrynames$code)]
# Check the frequency of each country in the new column
table(WV5_data$country)
Andorra Argentina Australia Brazil Bulgaria Burkina Faso Canada Chile
1003 1002 1421 1500 1001 1534 2164 1000
China Colombia Cyprus (G) Egypt Ethiopia Finland France Georgia
1991 3025 1050 3051 1500 1014 1001 1500
Germany Ghana Great Britain Guatemala Hong Kong Hungary India Indonesia
2064 1534 1041 1000 1252 1007 2001 2015
Iran Iraq Italy Japan Jordan Malaysia Mali Mexico
2667 2701 1012 1096 1200 1201 1534 1560
Moldova Morocco Netherlands New Zealand Norway Peru Poland Romania
1046 1200 1050 954 1025 1500 1000 1776
Russia Rwanda Slovenia South Africa South Korea Spain Sweden Switzerland
2033 1507 1037 2988 1200 1200 1003 1241
Taiwan Thailand Trinidad and Tobago Turkey Ukraine United States Uruguay Viet Nam
1227 1534 1002 1346 1000 1249 1000 1495
Zambia
1500
# Display the updated WV5_data
print(WV5_data)
#Read Dataset (Wave 6)
WV6_data <- load("/Users/cristinacandido/Documents/Github/risk_wvs/data/WVS/WV6_Data_R_v20201117.rdata")
WV6_data <- WV6_Data_R_v20201117
print(WV6_data)
WV6_data <- WV6_data %>%
rename(wave = V1, gender = V240, age = V242,country_code = V2, risktaking = V76, children = V58, married = V57, employed = V229, education = V248)
#select only the variables of interest
WV6_data <- WV6_data %>%
dplyr::select(wave, gender, age, country_code,risktaking, children, married, employed, education)
WV6_data
countrynames = read.csv("/Users/cristinacandido/Documents/Github/risk_wvs/data/WVS/countrynames.txt", header=FALSE,as.is=TRUE)
colnames(countrynames) = c("code", "name")
WV6_data$country = countrynames$name [match(WV6_data$country_code, countrynames$code)]
table(WV6_data$country)
Algeria Argentina Armenia Australia Azerbaijan Belarus Brazil Chile
1200 1030 1100 1477 1002 1535 1486 1000
China Colombia Cyprus (G) Ecuador Egypt Estonia Georgia Germany
2300 1512 1000 1202 1523 1533 1202 2046
Ghana Haiti Hong Kong India Iraq Japan Jordan Kazakhstan
1552 1996 1000 4078 1200 2443 1200 1500
Kuwait Kyrgyzstan Lebanon Libya Malaysia Mexico Morocco Netherlands
1303 1500 1200 2131 1300 2000 1200 1902
New Zealand Nigeria Pakistan Palestine Peru Philippines Poland Qatar
841 1759 1200 1000 1210 1200 966 1060
Romania Russia Rwanda Singapore Slovenia South Africa South Korea Spain
1503 2500 1527 1972 1069 3531 1200 1189
Sweden Taiwan Thailand Trinidad and Tobago Tunisia Turkey Ukraine United States
1206 1238 1200 999 1205 1605 1500 2232
Uruguay Uzbekistan Yemen Zimbabwe
1000 1500 1000 1500
WV6_data
range(WVS_data$age)
[1] 15 99
WVS_data = subset(WVS_data, risktaking > 0 & gender > 0 & age >0 & education > 0 & employed > 0 & married > 0 & children >= 0)
data_Wave5 = subset(WV5_data, risktaking > 0 & gender > 0 & age >0 & education > 0 & employed > 0 & married > 0 & children >= 0)
data_Wave6 = subset(WV6_data, risktaking > 0 & gender > 0 & age >0 & education > 0 & employed > 0 & married > 0 & children >= 0)
WVS_data <- na.omit(WVS_data)
data_Wave5 <- na.omit(data_Wave5)
data_Wave6 <- na.omit(data_Wave6)
# Use the mutate function to change the country name
WVS_data <- WVS_data %>%
mutate(country = ifelse(country == "Great Britain", "United Kingdom", country))
# Transfrom risk item such that high values represent more risk taking
WVS_data$risktaking = 6 - WVS_data$risktaking + 1
# Transform risk variable into T-score (mean = 50, sd = 10)
WVS_data$T_score_risktaking = 10*scale(WVS_data$risktaking, center=TRUE,scale=TRUE)+50
WVS_data
#Transform risk variable into Z score
# Assuming T-scores have a mean of 50 and a standard deviation of 10
WVS_data$Z_score_risktaking = (WVS_data$T_score_risktaking - 50) / 10
# Print the resulting data frame
print(WVS_data)
WVS_data <- WVS_data %>%
group_by(country) %>%
mutate(z_score_age = scale(age))
WVS_data
WVS_data$gender = ifelse(WVS_data$gender == 1, 0, 1) # sex: male vs. female
WVS_data$children = ifelse(WVS_data$children == 0, 0, 1) # children: no vs. yes
WVS_data$married = ifelse(WVS_data$married == 1, 1, 0) # married: yes vs. no
WVS_data$employed = ifelse(WVS_data$employed < 4, 1, 0) # employed: yes vs. no
WVS_data$education = ifelse(WVS_data$education < 4, 0, 1) # education: no primary vs. primary+
hardship <- hardship_complete %>%
dplyr::select(country, isocode, hardship_index)
hardship
WVS_mixed_model <- left_join(WVS_data, hardship, by = "country")
WVS_mixed_model
head(WVS_mixed_model)
library(lmerTest)
Attaching package: ‘lmerTest’
The following object is masked from ‘package:lme4’:
lmer
The following object is masked from ‘package:stats’:
step
# intercept only model
model0 = lmer(Z_score_risktaking ~ 1 + (1|country),data = WVS_mixed_model)
summary_model0=summary(model0)
summary_model0
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: Z_score_risktaking ~ 1 + (1 | country)
Data: WVS_mixed_model
REML criterion at convergence: 406605.8
Scaled residuals:
Min 1Q Median 3Q Max
-2.3179 -0.8254 -0.0722 0.7614 2.5318
Random effects:
Groups Name Variance Std.Dev.
country (Intercept) 0.08622 0.2936
Residual 0.90218 0.9498
Number of obs: 148527, groups: country, 76
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.01243 0.03379 75.10025 0.368 0.714
# age, sex
model1 = lmer(T_score_risktaking ~ 1 +scale(z_score_age)+factor(gender) + (1+scale(z_score_age)+factor(gender)|country),data = WVS_mixed_model)
summary_model1=summary(model1)
summary_model1
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: T_score_risktaking ~ 1 + scale(z_score_age) + factor(gender) + (1 + scale(z_score_age) + factor(gender) | country)
Data: WVS_mixed_model
REML criterion at convergence: 1081381
Scaled residuals:
Min 1Q Median 3Q Max
-2.4657 -0.7816 -0.0765 0.7536 3.2215
Random effects:
Groups Name Variance Std.Dev. Corr
country (Intercept) 7.6729 2.7700
scale(z_score_age) 0.8133 0.9018 0.39
factor(gender)1 0.8723 0.9340 0.19 0.33
Residual 84.6041 9.1981
Number of obs: 148527, groups: country, 76
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 51.3360 0.3200 75.2652 160.42 <2e-16 ***
scale(z_score_age) -1.9148 0.1068 74.8609 -17.93 <2e-16 ***
factor(gender)1 -2.3166 0.1193 72.3578 -19.43 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) sc(__)
scl(z_scr_) 0.373
fctr(gndr)1 0.135 0.287
#model 2
library(lme4)
library(lme4)
# Define the lmer model and assign it to 'model_2'
model_2 <- lmer(T_score_risktaking ~ 1 + scale(z_score_age) + factor(gender) + factor(children) +
factor(married) + factor(employed) + factor(education) +
(1 + scale(z_score_age) + factor(gender) + factor(children) +
factor(married) + factor(employed) + factor(education) | country),
data = WVS_data)
# Display the summary of the model
summary(model_2)
anova(model0,model1)
refitting model(s) with ML (instead of REML)
Data: WVS_mixed_model
Models:
model0: Z_score_risktaking ~ 1 + (1 | country)
model1: T_score_risktaking ~ 1 + scale(z_score_age) + factor(gender) + (1 + scale(z_score_age) + factor(gender) | country)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
model0 3 406607 406637 -203300 406601
model1 10 1081396 1081495 -540688 1081376 0 7 1
anova(model1,model_2)
refitting model(s) with ML (instead of REML)
coefsallmodels=rbind(summary_model1$coefficients,
summary_model_2$coefficients,
summary_model_3$coefficients[c(1:2,4:8,3,9:10),])
write.csv(coefsallmodels,"coefsallmodels.csv")
library(dplyr)
# Assuming your dataset is named 'data' and contains columns: location, age, cause, val
# Step 1: Filter data for a specific mental disorder (e.g., Anxiety Disorders)
filtered_data <- gbd_mentalhealth %>%
filter(cause == "Anxiety disorders") # Change "Anxiety Disorders" to the desired disorder
# Step 2: Calculate the mean of 'val' (Disability-Adjusted Life Years) across locations and ages
mean_DALYs <- mean(filtered_data$val)
# Optionally, if you want to calculate mean by country:
mean_by_country <- filtered_data %>%
group_by(country) %>%
summarise(mean_Anxiety_disorders = mean(val))
# View the mean DALYs for the filtered disorder
print(mean_DALYs)
[1] 2637.681
# View the mean DALYs by country for the filtered disorder
print(mean_by_country)
NA
```